In [44]:
%load_ext sql
%sql sqlite://
Out[44]:
In [45]:
%%sql
pragma foreign_keys = ON; -- WARNING: by default off in sqlite
drop table if exists product; -- This needs to be dropped if exists, see why further down!
drop table if exists company;
create table company (
cname varchar primary key, -- company name uniquely identifies the company.
stockprice money, -- stock price is in money
country varchar); -- country is just a string
insert into company values ('GizmoWorks', 25.0, 'USA');
insert into company values ('Canon', 65.0, 'Japan');
insert into company values ('Hitachi', 15.0, 'Japan');
create table product(
pname varchar primary key, -- name of the product
price money, -- price of the product
category varchar, -- category
manufacturer varchar, -- manufacturer
foreign key (manufacturer) references company(cname));
insert into product values('Gizmo', 19.99, 'Gadgets', 'GizmoWorks');
insert into product values('SingleTouch', 149.99, 'Photography', 'Canon');
insert into product values('PowerGizmo', 29.99, 'Gadgets', 'GizmoWorks');
insert into product values('MultiTouch', 203.99, 'Household', 'Hitachi');
Out[45]:
In [46]:
%%sql
DROP TABLE IF EXISTS franchise;
CREATE TABLE franchise (name TEXT, db_type TEXT);
INSERT INTO franchise VALUES ('Bobs Bagels', 'NoSQL');
INSERT INTO franchise VALUES ('eBagel', 'NoSQL');
INSERT INTO franchise VALUES ('BAGEL CORP', 'MySQL');
DROP TABLE IF EXISTS store;
CREATE TABLE store (franchise TEXT, location TEXT);
INSERT INTO store VALUES ('Bobs Bagels', 'NYC');
INSERT INTO store VALUES ('eBagel', 'PA');
INSERT INTO store VALUES ('BAGEL CORP', 'Chicago');
INSERT INTO store VALUES ('BAGEL CORP', 'NYC');
INSERT INTO store VALUES ('BAGEL CORP', 'PA');
DROP TABLE IF EXISTS bagel;
CREATE TABLE bagel (name TEXT, price MONEY, made_by TEXT);
INSERT INTO bagel VALUES ('Plain with shmear', 1.99, 'Bobs Bagels');
INSERT INTO bagel VALUES ('Egg with shmear', 2.39, 'Bobs Bagels');
INSERT INTO bagel VALUES ('eBagel Drinkable Bagel', 27.99, 'eBagel');
INSERT INTO bagel VALUES ('eBagel Expansion Pack', 1.99, 'eBagel');
INSERT INTO bagel VALUES ('Plain with shmear', 0.99, 'BAGEL CORP');
INSERT INTO bagel VALUES ('Organic Flax-seed bagel chips', 0.99, 'BAGEL CORP');
DROP TABLE IF EXISTS purchase;
-- Note that date is an int here just to simplify things
CREATE TABLE purchase (bagel_name TEXT, franchise TEXT, date INT, quantity INT, purchaser_age INT);
INSERT INTO purchase VALUES ('Plain with shmear', 'Bobs Bagels', 1, 12, 28);
INSERT INTO purchase VALUES ('Egg with shmear', 'Bobs Bagels', 2, 6, 47);
INSERT INTO purchase VALUES ('Plain with shmear', 'BAGEL CORP', 2, 12, 24);
INSERT INTO purchase VALUES ('Plain with shmear', 'BAGEL CORP', 3, 1, 17);
INSERT INTO purchase VALUES ('eBagel Expansion Pack', 'eBagel', 1, 137, 5);
INSERT INTO purchase VALUES ('Plain with shmear', 'Bobs Bagels', 4, 24, NULL);
Out[46]:
In [47]:
%sql SELECT * FROM Product;
Out[47]:
In [48]:
%%sql SELECT pname,price FROM Product
ORDER BY pname
Out[48]:
In [49]:
%%sql SELECT pname FROM Product
ORDER BY Price
Out[49]:
In [50]:
%%sql SELECT distinct pname FROM Product
ORDER BY Price
Out[50]:
In [51]:
# Create tables & insert some random numbers
# Note: in Postgresql, try the generate_series function...
%sql DROP TABLE IF EXISTS R; DROP TABLE IF EXISTS S; DROP TABLE IF EXISTS T;
%sql CREATE TABLE R (A int); CREATE TABLE S (A int); CREATE TABLE T (A int);
for i in range(1,6):
%sql INSERT INTO R VALUES (:i)
for i in range(1,11,3):
%sql INSERT INTO T VALUES (:i)
Попробуем получить $R \cap (S \cup T) = \{1,4\}$
In [60]:
%%sql SELECT DISTINCT R.A FROM R, S, T
WHERE R.A=S.A OR R.A=T.A
Out[60]:
Почему возвращаемое множество пусто?
Посмотрим на порядок операций для данного запроса:
R,S,T
WHERE
. Выполним (1):
In [61]:
%sql SELECT DISTINCT R.A FROM R, S, T;
Out[61]:
Декартово произведение пусто, потому что S
- пусто!
In [62]:
%%sql
SELECT R.A FROM R, S WHERE R.A=S.A
UNION ALL
SELECT R.A FROM R, T WHERE R.A=T.A
Out[62]:
In [63]:
%sql DROP TABLE IF EXISTS S; CREATE TABLE S (A int);
for i in range(1,6):
%sql INSERT INTO S VALUES (:i)
In [64]:
%%sql
SELECT R.A FROM R, S WHERE R.A=S.A
UNION ALL
SELECT R.A FROM R, T WHERE R.A=T.A
Out[64]:
In [14]:
%%sql
SELECT R.A FROM R, S, T WHERE R.A = S.A
INTERSECT
SELECT R.A FROM R, S, T WHERE R.A = T.A
Out[14]:
In [15]:
%%sql
SELECT R.A FROM R, S, T WHERE R.A = S.A
EXCEPT
SELECT R.A FROM R, S, T WHERE R.A = T.A
Out[15]:
Заполним таблицы
Franchise(name TEXT, db_type TEXT)
Store(franchise TEXT, location TEXT)
Bagel(name TEXT, price MONEY, made_by TEXT)
Purchase(bagel_name TEXT, franchise TEXT, date INT, quantity INT, purchaser_age INT)
In [65]:
%%sql
SELECT franchise FROM store WHERE location = 'NYC'
UNION
SELECT franchise FROM store WHERE location = 'PA';
Out[65]:
In [17]:
%%sql
SELECT f.db_type
FROM franchise f, store s
WHERE f.name = s.franchise AND s.location = 'NYC'
INTERSECT
SELECT f.db_type
FROM franchise f, store s
WHERE f.name = s.franchise AND s.location = 'PA'
Out[17]:
Что не так
Если посмотреть на данные, то должно вернуться только "MySQL" в качестве результата:
In [18]:
%%sql
SELECT f.name, s.location, f.db_type
FROM franchise f, store s
WHERE f.name = s.franchise;
Out[18]:
Посмотрим на значения до пересечения
In [19]:
%%sql SELECT f.db_type
FROM franchise f, store s
WHERE f.name = s.franchise AND s.location = 'NYC'
Out[19]:
In [20]:
%%sql SELECT f.db_type
FROM franchise f, store s
WHERE f.name = s.franchise AND s.location = 'PA'
Out[20]:
Проблема в том, что мы сделали операцию INTERSECT
после возращение атрибутов запросов, а не до
In [71]:
%%sql
select distinct f.dbtype
from
(
select franchise
from store where location = 'NYC'
INTERSECT
SELECT franchise
from store where location = 'PA'
) A, franchise
where A.franchise = f.name
In [72]:
%%sql
SELECT f.db_type
FROM franchise f
WHERE f.name IN (
SELECT s.franchise FROM store s WHERE s.location = 'NYC')
AND f.name IN (
SELECT s.franchise FROM store s WHERE s.location = 'PA');
Out[72]:
Другой пример: eBagel's CEO хочет знать, какие бд используют компании с возрастом покупателей от 20 до 30:
In [73]:
%%sql
SELECT f.db_type
FROM franchise f
WHERE f.name IN (
SELECT b.made_by
FROM bagel b, purchase p
WHERE b.name = p.bagel_name
AND p.purchaser_age >= 20 AND p.purchaser_age < 30);
Out[73]:
Можно ли обойтись без вложенного запроса?
In [76]:
%%sql
SELECT f.db_type
FROM franchise f, bagel b, purchase p
WHERE f.name = b.made_by
AND b.name = p.bagel_name
AND p.purchaser_age >= 20 AND p.purchaser_age < 30;
Out[76]:
Обращайте внимание на дубли! используйте DISTINCT
.
Также можно использовать:
ALL
ANY
EXISTS
К сожалению, ALL
и ANY
не поддерживаются SQLite Покажем пример с EXISTS
. Предположим, что мы хотим ответить на вопрос: у eBagel есть какие-нибудь products, которые дешевле всех продуктов конкурентов?
In [77]:
%%sql
SELECT b.name, b.price
FROM bagel b
WHERE b.made_by = 'eBagel'
AND EXISTS (SELECT name FROM bagel WHERE made_by <> 'eBagel' AND price > b.price);
Out[77]:
Примеры
In [25]:
%sql SELECT AVG(price) FROM bagel WHERE made_by = 'eBagel';
Out[25]:
In [26]:
%sql SELECT COUNT(*) AS "Number of Stores in PA" FROM store WHERE location = 'PA';
Out[26]:
In [27]:
%sql SELECT COUNT(location) FROM store;
Out[27]:
In [28]:
%sql SELECT COUNT(DISTINCT location) FROM store;
Out[28]:
Можно ли получить общую сумму, заработанную компаниями?
In [29]:
%%sql
SELECT SUM(b.price * p.quantity) AS net_sales
FROM bagel b, purchase p
WHERE b.name = p.bagel_name;
Out[29]:
Информацию можно детализировать!
In [80]:
%%sql
SELECT b.made_by, SUM(b.price * p.quantity) AS revenue
FROM bagel b, purchase p
WHERE b.made_by = p.franchise AND b.name = p.bagel_name
GROUP BY b.made_by;
Out[80]:
Найдем только те компании, количество продаж которых больше 12.
In [79]:
%%sql
SELECT b.name, SUM(b.price * p.quantity) AS sales
FROM bagel b, purchase p
WHERE b.name = p.bagel_name AND b.made_by = p.franchise
GROUP BY b.name
HAVING SUM(p.quantity) > 12;
Out[79]:
Посмотрим на предыдущий запрос детально Сформируем SFW
In [32]:
%%sql
SELECT *
FROM bagel b, purchase p
WHERE b.name = p.bagel_name AND b.made_by = p.franchise;
Out[32]:
Применим GROUP_BY
In [33]:
%%sql
SELECT
b.name,
GROUP_CONCAT(b.price, ',') AS prices,
GROUP_CONCAT(b.made_by, ',') AS made_bys,
bagel_name,
GROUP_CONCAT(p.franchise, ',') AS franchises,
GROUP_CONCAT(p.date, ',') AS dates,
GROUP_CONCAT(p.quantity, ',') AS quantities,
GROUP_CONCAT(p.purchaser_age, ',') AS purchaser_ages
FROM bagel b, purchase p
WHERE b.name = p.bagel_name AND b.made_by = p.franchise
GROUP BY b.name;
Out[33]:
Group_concat показывает сгенерированную строку
Применим Having;
In [34]:
%%sql
SELECT
b.name,
GROUP_CONCAT(b.price, ',') AS prices,
GROUP_CONCAT(b.made_by, ',') AS made_bys,
bagel_name,
GROUP_CONCAT(p.franchise, ',') AS franchises,
GROUP_CONCAT(p.date, ',') AS dates,
SUM(p.quantity) AS total_quantity,
GROUP_CONCAT(p.purchaser_age, ',') AS purchaser_ages
FROM bagel b, purchase p
WHERE b.name = p.bagel_name AND b.made_by = p.franchise
GROUP BY b.name
HAVING SUM(p.quantity) > 12;
Out[34]:
Выполним проекцию.
In [35]:
%%sql
SELECT b.name, SUM(b.price * p.quantity) AS sales
FROM bagel b, purchase p
WHERE b.name = p.bagel_name AND b.made_by = p.franchise
GROUP BY b.name
HAVING SUM(p.quantity) > 12;
Out[35]:
In [36]:
%sql SELECT DISTINCT made_by FROM bagel WHERE name LIKE '%shmear%';
Out[36]:
Найдем компании, у которых все продукты имеют shmear в названии:
In [37]:
%%sql
SELECT DISTINCT made_by
FROM bagel
WHERE made_by NOT IN (
SELECT made_by
FROM bagel
WHERE name NOT LIKE '%shmear%');
Out[37]:
In [38]:
%sql SELECT * FROM purchase WHERE bagel_name LIKE '%shmear%';
Out[38]:
In [39]:
%%sql SELECT * FROM purchase
WHERE bagel_name LIKE '%shmear%'
AND (purchaser_age >= 5 OR purchaser_age < 5);
Out[39]:
Применяя условия, необходимо корректно обрабатывать условия на NULL:
In [40]:
%%sql SELECT * FROM purchase
WHERE bagel_name LIKE '%shmear%'
AND (purchaser_age >= 5 OR purchaser_age < 5
OR purchaser_age IS NULL);
Out[40]:
Что произойдет если есть null в join'е?
In [41]:
%%sql
SELECT DISTINCT b.name
FROM bagel b, purchase p
WHERE b.name = p.bagel_name AND b.made_by = p.franchise;
Out[41]:
Пропускаем пончики, которые никто не покупал
In [42]:
%%sql
SELECT DISTINCT b.name
FROM bagel b
INNER JOIN purchase p ON b.name = p.bagel_name AND b.made_by = p.franchise;
Out[42]:
INNER JOIN
на таблицыA
и B
с условием на соединение C(A,B)
возвращает только такие отношения (a,b)
, для которых C(a,b) = TRUE
.
LEFT OUTER JOIN
. возвращает также (a, NULL)
для тех кортежей a
, для которых нет b
таких, что C(a,b) = TRUE
:
In [43]:
%%sql
SELECT DISTINCT b.name
FROM bagel b
LEFT OUTER JOIN purchase p ON b.name = p.bagel_name AND b.made_by = p.franchise;
Out[43]:
In [ ]: